Problem Set 2

Autor/a

Jorge Alejandro Quiroga Aguilar

1 Cargar librerías

Código
# Instalar y cargar pacman si no está
if (!require(pacman)) install.packages("pacman")

# Vector con todos tus paquetes
paquetes <- c(
  # Bases y estadísticas
  "datasets", "stats", "MASS", "boot", "class",
  # Modelado y ML
  "ISLR", "glmnet", "tree", "gbm", "randomForest", "caret",
  # Evaluación de modelos
  "ROCR",
  # Manipulación y wrangling
  "dplyr", "tidyr", "readr", "stringr", "lubridate", "forcats", "glue",
  # Visualización
  "ggplot2", "plotly", "ggiraph", "mapview",
  # Reportes y tablas
  "DT", "gt", "knitr",
  # Geoespacial
  "sf", "terra",
  # Tidyverse completo
  "tidyverse", "modelsummary"
)

# Cargar todo de una vez
pacman::p_load(char = paquetes)
package 'data.table' successfully unpacked and MD5 sums checked
package 'modelsummary' successfully unpacked and MD5 sums checked

The downloaded binary packages are in
    C:\Users\jorge\AppData\Local\Temp\RtmpqIoNdx\downloaded_packages

2 (30 pts) Part A: Deforestation in one municipality

2.1 (2.5pts) Load the Loss year raster you downloaded from Global Forest Watch. What is the CRS of the downloaded raster?

Código
#| label: cargar-raster
# Cargar el raster de pérdida de bosque
defo_raster <- rast("Hansen_GFC-2022-v1.10_lossyear_10N_080W.tif")

# Mostrar el raster
plot(defo_raster)

Código
# Extraer el código EPSG usando las funciones del paquete terra
crs_info <- crs(defo_raster, describe = TRUE)
epsg_code <- crs_info$code

print(glue("El CRS del raster es EPSG:{epsg_code}"))
El CRS del raster es EPSG:4326

2.2 (5pts) Make a single plot with the downloaded raster and Antioquia’s state shape file. Restrict the raster to Antioquia’s boundary.

Código
##########################################
#volver a cargar librerias y Rast para que funciones gneeración de archivo con eliminación de cache
pacman::p_load(char = paquetes)
Installing package into 'C:/Users/jorge/AppData/Local/R/win-library/4.5'
(as 'lib' is unspecified)
also installing the dependency 'data.table'
Warning: unable to access index for repository http://www.stats.ox.ac.uk/pub/RWin/bin/windows/contrib/4.5:
  cannot open URL 'http://www.stats.ox.ac.uk/pub/RWin/bin/windows/contrib/4.5/PACKAGES'
package 'data.table' successfully unpacked and MD5 sums checked
Warning: cannot remove prior installation of package 'data.table'
Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying
C:\Users\jorge\AppData\Local\R\win-library\4.5\00LOCK\data.table\libs\x64\data_table.dll
to
C:\Users\jorge\AppData\Local\R\win-library\4.5\data.table\libs\x64\data_table.dll:
Permission denied
Warning: restored 'data.table'
package 'modelsummary' successfully unpacked and MD5 sums checked

The downloaded binary packages are in
    C:\Users\jorge\AppData\Local\Temp\RtmpqIoNdx\downloaded_packages

modelsummary installed
Warning: package 'modelsummary' was built under R version 4.5.1
Warning in pacman::p_load(char = paquetes): Failed to install/load:
modelsummary
Código
defo_raster <- rast("Hansen_GFC-2022-v1.10_lossyear_10N_080W.tif") # volver a cargar para que funcione el puntero extero de markdown
##########################################


# Cargar el shapefile de Antioquia

Antioquia_shape <- read_sf("Antioquia/Antioquia.shp")

Antioquia_shape <- terra::vect(Antioquia_shape)    # <- SpatVector para que funcione Rmarckdown

raster_crop_Antioquia <- crop(defo_raster, Antioquia_shape)
raster_mask_Antioquia <- mask(raster_crop_Antioquia, Antioquia_shape) 

|---------|---------|---------|---------|
=========================================
                                          
Código
plot(raster_crop_Antioquia) # plot raster_crop

Código
# plot(Antioquia_shape, add = T) # agregar capa antioquia
plot(raster_mask_Antioquia, main = "Deforestación Antioquia") # plot raster_mask

2.3 (22.5 pts) Compute deforestation in your municipality.

2.3.1 (a) (5 pts) Make a single plot with the downloaded raster and the shape of the municipality assigned to you in the previous table (Only show the raster values inside your municipality).

Código
##########################################
#volver a cargar librerias y Rast para que funciones gneeración de archivo con eliminación de cache
pacman::p_load(char = paquetes)
Installing package into 'C:/Users/jorge/AppData/Local/R/win-library/4.5'
(as 'lib' is unspecified)
also installing the dependency 'data.table'
Warning: unable to access index for repository http://www.stats.ox.ac.uk/pub/RWin/bin/windows/contrib/4.5:
  cannot open URL 'http://www.stats.ox.ac.uk/pub/RWin/bin/windows/contrib/4.5/PACKAGES'
package 'data.table' successfully unpacked and MD5 sums checked
Warning: cannot remove prior installation of package 'data.table'
Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying
C:\Users\jorge\AppData\Local\R\win-library\4.5\00LOCK\data.table\libs\x64\data_table.dll
to
C:\Users\jorge\AppData\Local\R\win-library\4.5\data.table\libs\x64\data_table.dll:
Permission denied
Warning: restored 'data.table'
package 'modelsummary' successfully unpacked and MD5 sums checked

The downloaded binary packages are in
    C:\Users\jorge\AppData\Local\Temp\RtmpqIoNdx\downloaded_packages

modelsummary installed
Warning: package 'modelsummary' was built under R version 4.5.1
Warning in pacman::p_load(char = paquetes): Failed to install/load:
modelsummary
Código
defo_raster <- rast("Hansen_GFC-2022-v1.10_lossyear_10N_080W.tif") # volver a cargar para que funcione el puntero extero de markdown
##########################################


# Cargar Shape La Union
muni_shp <- read_sf("Antioquia/Antioquia.shp") %>% 
             filter(MPIO_CNMBR == "LA UNIÓN")
muni_shp <- terra::vect(muni_shp)    # <- SpatVector para que funcione Rmarckdown


# Crop and mask the raster
raster_crop <- crop(defo_raster, muni_shp) # crops to the EXTENT
raster_mask <- mask(raster_crop, muni_shp) # mask FOLLOWS EXACT SHAPE
plot(raster_crop) # plot raster_crop

Código
# plot(muni_shp, add = T) # add municipality
plot(raster_mask, main = "Deforestación La Unión")

2.3.2 (b) (10 pts) Re-project your municipality restricted raster to “epsg:32618” and produce a frequency table of the raster values inside your municipality.

Código
# Global variables
CRS = "epsg:32618"

# Reproyectar el raster restringido a EPSG:32618 (categoría => usar 'near')
raster_mask
class       : SpatRaster 
size        : 677, 549, 1  (nrow, ncol, nlyr)
resolution  : 0.00025, 0.00025  (x, y)
extent      : -75.4195, -75.28225, 5.8675, 6.03675  (xmin, xmax, ymin, ymax)
coord. ref. : lon/lat WGS 84 (EPSG:4326) 
source(s)   : memory
varname     : Hansen_GFC-2022-v1.10_lossyear_10N_080W 
name        : Layer_1 
min value   :       0 
max value   :      22 
Código
r_utm <- terra::project(raster_mask, CRS, method = "near")

# Verificar la proyección
crs(r_utm)
[1] "PROJCRS[\"WGS 84 / UTM zone 18N\",\n    BASEGEOGCRS[\"WGS 84\",\n        ENSEMBLE[\"World Geodetic System 1984 ensemble\",\n            MEMBER[\"World Geodetic System 1984 (Transit)\"],\n            MEMBER[\"World Geodetic System 1984 (G730)\"],\n            MEMBER[\"World Geodetic System 1984 (G873)\"],\n            MEMBER[\"World Geodetic System 1984 (G1150)\"],\n            MEMBER[\"World Geodetic System 1984 (G1674)\"],\n            MEMBER[\"World Geodetic System 1984 (G1762)\"],\n            MEMBER[\"World Geodetic System 1984 (G2139)\"],\n            MEMBER[\"World Geodetic System 1984 (G2296)\"],\n            ELLIPSOID[\"WGS 84\",6378137,298.257223563,\n                LENGTHUNIT[\"metre\",1]],\n            ENSEMBLEACCURACY[2.0]],\n        PRIMEM[\"Greenwich\",0,\n            ANGLEUNIT[\"degree\",0.0174532925199433]],\n        ID[\"EPSG\",4326]],\n    CONVERSION[\"UTM zone 18N\",\n        METHOD[\"Transverse Mercator\",\n            ID[\"EPSG\",9807]],\n        PARAMETER[\"Latitude of natural origin\",0,\n            ANGLEUNIT[\"degree\",0.0174532925199433],\n            ID[\"EPSG\",8801]],\n        PARAMETER[\"Longitude of natural origin\",-75,\n            ANGLEUNIT[\"degree\",0.0174532925199433],\n            ID[\"EPSG\",8802]],\n        PARAMETER[\"Scale factor at natural origin\",0.9996,\n            SCALEUNIT[\"unity\",1],\n            ID[\"EPSG\",8805]],\n        PARAMETER[\"False easting\",500000,\n            LENGTHUNIT[\"metre\",1],\n            ID[\"EPSG\",8806]],\n        PARAMETER[\"False northing\",0,\n            LENGTHUNIT[\"metre\",1],\n            ID[\"EPSG\",8807]]],\n    CS[Cartesian,2],\n        AXIS[\"(E)\",east,\n            ORDER[1],\n            LENGTHUNIT[\"metre\",1]],\n        AXIS[\"(N)\",north,\n            ORDER[2],\n            LENGTHUNIT[\"metre\",1]],\n    USAGE[\n        SCOPE[\"Navigation and medium accuracy spatial referencing.\"],\n        AREA[\"Between 78°W and 72°W, northern hemisphere between equator and 84°N, onshore and offshore. Bahamas. Canada - Nunavut; Ontario; Quebec. Colombia. Cuba. Ecuador. Greenland. Haiti. Jamaica. Panama. Turks and Caicos Islands. United States (USA). Venezuela.\"],\n        BBOX[0,-78,84,-72]],\n    ID[\"EPSG\",32618]]"
Código
# Tabla de frecuencias de los valores del raster dentro del municipio
freq_tab<-as.data.frame(freq(r_utm, digits = 3))


# Tabla de frecuencias (value = lossyear; count = # celdas)
freq_tab <- rename(freq_tab, lossyear = value, n_cells = count) %>% 
            select(lossyear, n_cells) %>% 
            arrange(lossyear)

# Área por celda (m²) y por clase (ha) — EPSG:32618 usa metros
px <- terra::res(r_utm[[1]])               # c(xres, yres) en metros
px_m2 <- px[1] * px[2]
freq_tab$area_ha <- freq_tab$n_cells * px_m2 / 1e4

# Porcentaje dentro del muni (opcional, útil para el informe)
total_cells <- sum(freq_tab$n_cells)
total_ha    <- total_cells * px_m2 / 1e4
freq_tab$Porcentaje_cells <- 100 * freq_tab$n_cells / total_cells
freq_tab$Porcentaje_area  <- 100 * freq_tab$area_ha  / total_ha


# Resultado Tabla Frecuencias
knitr::kable(freq_tab, caption = "Tabla de Frecuencias de Deforestación en La Unión")
Tabla de Frecuencias de Deforestación en La Unión
lossyear n_cells area_ha Porcentaje_cells Porcentaje_area
0 217986 16665.054850 98.5127239 98.5127239
1 47 3.593155 0.0212403 0.0212403
2 51 3.898956 0.0230480 0.0230480
3 21 1.605452 0.0094904 0.0094904
4 176 13.455220 0.0795383 0.0795383
5 160 12.232019 0.0723076 0.0723076
6 201 15.366473 0.0908364 0.0908364
7 53 4.051856 0.0239519 0.0239519
8 81 6.192459 0.0366057 0.0366057
9 133 10.167865 0.0601057 0.0601057
10 74 5.657309 0.0334422 0.0334422
11 115 8.791763 0.0519711 0.0519711
12 124 9.479814 0.0560384 0.0560384
13 219 16.742575 0.0989710 0.0989710
14 104 7.950812 0.0469999 0.0469999
15 30 2.293503 0.0135577 0.0135577
16 188 14.372622 0.0849614 0.0849614
17 350 26.757540 0.1581728 0.1581728
18 316 24.158237 0.1428074 0.1428074
19 219 16.742575 0.0989710 0.0989710
20 192 14.678422 0.0867691 0.0867691
21 154 11.773318 0.0695960 0.0695960
22 283 21.635383 0.1278940 0.1278940

2.3.3 (c) (5 pts) How many pixels were “deforested” in 2019 in your municipality?

Código
# Número de píxeles deforestados en 2019 (lossyear = 19)
n_defor_2019 <- sum(r_utm[] == 19, na.rm = TRUE)
# n_defor_2019
print(glue("El número de píxeles deforestados en 2019 en La Unión es: {n_defor_2019}"))
El número de píxeles deforestados en 2019 en La Unión es: 219

2.3.4 (d) (2.5 pts) What is the area “deforested” in 2019 in your municipality?

Código
# numero de Hectarias deforestadas en 2019
area_defor_2019 <- n_defor_2019 * px_m2 / 1e4
# area_defor_2019
print(glue("El área deforestada en 2019 en La Unión es: {round(area_defor_2019,2)} ha"))
El área deforestada en 2019 en La Unión es: 16.74 ha

3 (70 pts) Part B: Estimate the eco-tourism treatment effect on deforestation. You will be “replicating” Column 3 of Table 4 from Saavedra(2025)“Economic development and environmental conservation: Evidence from eco-tourism”. Results will be slightly different because I added noise to the coordinates to preserve anonymity.

  • INPUTS
  • Points Ecoturism anonimized.csv: data from the filed survey including: latitude/longitude coordinates of the surveyed household/business (I added noise to the coordinates to preserve anonymity); municipality code (codmpio); eco-tourism promotion treatment indicator (treat = 1); randomization pair (pair)
  • Raster PartB.Forest: raster indicating whether a certain pixel is classified as forest(= 1) according to Colombia’s environmental institute.
  • Raster PartB.Loss: we saved you time by downloading, cropping and projecting the GFW rasters so you can focus on computing deforestation

3.1 4. (20 pts) Import Points Ecoturism anonimized.csv and make a shape file using the latitude longitude coordinates provided. Then generate a buffer of radius 2km around each point. Then compute the following data for the buffer of the X point

Código
##   
##   ### Unir Rasters Loss
##   
##   # --- 1. MODIFICA LA RUTA A TU CARPETA DE ENTRADA ---
##   # R en Windows también prefiere las barras inclinadas hacia adelante (/)
##   input_folder <- "D:/OneDrive - Universidad del rosario/001 MACHINE LEARNING/001 exams/Problem_Set_2/Inputs/Raster_PartB/Loss"
##   
##   # --- 2. DEFINE LA RUTA Y EL NOMBRE DEL ARCHIVO DE SALIDA ---
##   output_file <- "D:/OneDrive - Universidad del rosario/001 MACHINE LEARNING/001 ##   exams/Problem_Set_2/Inputs/Raster_PartB/Consolidados/LOSS_Raster_Unido_Final_R.tif"
##   
##   # --- 3. DEFINE EL VALOR NODATA ---
##   # Este es el valor que quieres usar
##   # nodata_value <- -3.4e+38
##   
##   # --- NO ES NECESARIO MODIFICAR NADA DEBAJO DE ESTA LÍNEA ---
##   
##   # Encontrar todos los archivos .tif dentro de la carpeta
##   # full.names = TRUE es importante para obtener la ruta completa
##   files_to_merge <- list.files(input_folder, pattern = "\\.tif$", full.names = TRUE)
##   
##   # Informar al usuario cuántos archivos se encontraron
##   cat("Se encontraron", length(files_to_merge), "archivos para unir.\n")
##   
##   # Crear un Raster Virtual (VRT). Es un paso súper rápido que no consume memoria
##   # terra es inteligente y generalmente lee el valor NoData de los archivos originales
##   vrt_raster <- vrt(files_to_merge)
##   
##   # Escribir el VRT a un archivo .tif final. Este es el paso que realiza la unión
##   # y puede tardar un poco.
##   writeRaster(vrt_raster, 
##               filename = output_file, 
##               overwrite = TRUE)
##               #gdal = c("COMPRESS=LZW")) # Opción para comprimir el archivo final
##   
##   cat("¡Proceso completado! El archivo unido se guardó en:", output_file, "\n")
##   
Código
##  ### Unir Rasters Forest
##  
##  # ---  1. MODIFICA LA RUTA A TU CARPETA DE ENTRADA ---
##  # R en Windows también prefiere las barras inclinadas hacia adelante (/)
##  input_folder <- "D:/OneDrive - Universidad del rosario/001 MACHINE LEARNING/001 exams/Problem_Set_2/Inputs/Raster_PartB/Forest"
##  
##  # ---  2. DEFINE LA RUTA Y EL NOMBRE DEL ARCHIVO DE SALIDA ---
##  output_file <- "D:/OneDrive - Universidad del rosario/001 MACHINE LEARNING/001  ## exams/Problem_Set_2/Inputs/Raster_PartB/Consolidados/FOREST_Raster_Unido_Final_R.tif"
##  
##  # ---  3. DEFINE EL VALOR NODATA ---
##  # Este es el valor que quieres usar
##  # nodata_value <- -3.4e+38
##  
##  # --- NO ES NECESARIO MODIFICAR NADA DEBAJO DE ESTA LÍNEA ---
##  
##  # Encontrar todos los archivos .tif dentro de la carpeta
##  # full.names = TRUE es importante para obtener la ruta completa
##  files_to_merge <- list.files(input_folder, pattern = "\\.tif$", full.names = TRUE)
##  
##  # Informar al usuario cuántos archivos se encontraron
##  cat("Se encontraron", length(files_to_merge), "archivos para unir.\n")
##  
##  # Crear un Raster Virtual (VRT). Es un paso súper rápido que no consume memoria
##  # terra es inteligente y generalmente lee el valor NoData de los archivos originales
##  vrt_raster <- vrt(files_to_merge)
##  
##  # Escribir el VRT a un archivo .tif final. Este es el paso que realiza la unión
##  # y puede tardar un poco.
##  writeRaster(vrt_raster, 
##              filename = output_file, 
##              overwrite = TRUE)
##              #gdal = c("COMPRESS=LZW")) # Opción para comprimir el archivo final
##  
##  cat("¡Proceso completado! El archivo unido se guardó en:", output_file, "\n")
##  
Código
### Cargar los rasters de bosque y pérdida
Forest <- rast("Inputs/Raster_PartB/Consolidados/FOREST_Raster_Unido_Final_R.tif")
Loss <- rast("Inputs/Raster_PartB/Consolidados/LOSS_Raster_Unido_Final_R.tif")
#

#But is because the rasters have different CRS
crs(Forest) == crs(Loss) # check if CRS is the same
[1] TRUE
Código
# Extraer el código EPSG usando las funciones del paquete terra
crs_info <- crs(Forest, describe = TRUE)
Forest_epsg_code <- crs_info$code
Forest_epsg_code
[1] "32618"
Código
crs_info <- crs(Loss, describe = TRUE)
Loss_epsg_code <- crs_info$code
Loss_epsg_code
[1] "32618"
Código
# Mostrar el raster
plot(Forest)

Código
plot(Loss)

3.1.1 (a) (7.5 pts) How many pixels were deforested in the buffer according to GFW in 2018? And in 2019?

Código
#| label: buffer-crop-mask_loss
#| cache: false
#| message: true
#| warning: true

##########################################
#volver a cargar librerias y Rast para que funciones gneeración de archivo con eliminación de cache
pacman::p_load(char = paquetes)
package 'data.table' successfully unpacked and MD5 sums checked
package 'modelsummary' successfully unpacked and MD5 sums checked

The downloaded binary packages are in
    C:\Users\jorge\AppData\Local\Temp\RtmpqIoNdx\downloaded_packages
Código
Forest <- rast("Inputs/Raster_PartB/Consolidados/FOREST_Raster_Unido_Final_R.tif")
Loss <- rast("Inputs/Raster_PartB/Consolidados/LOSS_Raster_Unido_Final_R.tif")# volver a cargar para que funcione el puntero extero de markdown
##########################################

# Global variables
CRSg = 32618 # The coordinate system to use
CRS = "epsg:32618" # The coordinate system to use (in terra raster CRS only the number is not a valid format)

# importar datos anonimizados

ecoturismo <- read.csv("D:/OneDrive - Universidad del rosario/001 MACHINE LEARNING/001 exams/Problem_Set_2/Inputs/Points_Ecoturism_anonimized.csv")

# Coordinates provided by Google Maps are "EPSG:4326", but we use our flat CRS
ecoturismo_shp <- ecoturismo %>%
  st_as_sf(coords = c("lon", "lat"), crs = "EPSG:4326") %>%
  st_transform(crs = CRSg) # reproyectar a CRS plano

mapview (ecoturismo_shp) # visualizar puntos
Código
# Generar buffers de 2km alrededor de cada punto
ecoturismo_buffer <- st_buffer(ecoturismo_shp, dist = 2000)
ecoturismo_buffer <- terra::vect(ecoturismo_buffer)
# View(ecoturismo_buffer)
mapview(ecoturismo_buffer) # visualizar buffers
Código
# Then compute the following data for the buffer of the 7 point
# Forma correcta de filtrar un SpatVector
buffer_1800110 <- ecoturismo_buffer[ecoturismo_buffer$NIM == 1800110, ]
buffer_shp<-buffer_1800110

# buffer_shp <- terra::vect(buffer_shp) # convertir a SpatVector para que funcione con terra 

mapview(buffer_shp)
Código
plot(buffer_shp)

Código
# Crop and mask the raster
buffer_raster_crop <- crop(Loss, buffer_shp) # crops to the EXTENT
buffer_raster_mask <- mask(buffer_raster_crop, buffer_shp) # mask FOLLOWS EXACT SHAPE
plot(buffer_raster_crop) # plot raster_crop
plot(buffer_shp, add = T) # add municipality

Código
plot(buffer_raster_mask, main = "Deforestación buffer punto 1800110")

Código
buff_Loss_mask<-buffer_raster_mask

# Tabla de frecuencias de los valores del raster dentro del municipio
freq_buffer<-as.data.frame(freq(buff_Loss_mask, digits = 3))


# Tabla de frecuencias (value = lossyear; count = # celdas)
freq_buffer <- rename(freq_buffer, lossyear = value, n_cells = count) %>% 
            select(lossyear, n_cells) %>% 
            arrange(lossyear)

# Área por celda (m²) y por clase (ha) — EPSG:32618 usa metros
px <- terra::res(buff_Loss_mask[[1]])               # c(xres, yres) en metros
px_m2 <- px[1] * px[2]
freq_buffer$area_ha <- freq_buffer$n_cells * px_m2 / 1e4

# Porcentaje dentro del muni (opcional, útil para el informe)
total_cells <- sum(freq_buffer$n_cells)
total_ha    <- total_cells * px_m2 / 1e4
freq_buffer$Porcentaje_cells <- 100 * freq_buffer$n_cells / total_cells
freq_buffer$Porcentaje_area  <- 100 * freq_buffer$area_ha  / total_ha


# Resultado Tabla Frecuencias
# knitr::kable(freq_buffer, caption = "Tabla de Frecuencias de Deforestación en La Unión")

# Número de píxeles deforestados en 2018 (lossyear = 18)
n_defor_2018 <- sum(buff_Loss_mask[] == 18, na.rm = TRUE)
# n_defor_2018
print(glue("El número de píxeles deforestados en 2018 en el buffer del punto 1800110 es: {n_defor_2018}"))
El número de píxeles deforestados en 2018 en el buffer del punto 1800110 es: 11
Código
# Número de píxeles deforestados en 2019 (lossyear = 19)
n_defor_2019 <- sum(buff_Loss_mask[] == 19, na.rm = TRUE)
# n_defor_2019
print(glue("El número de píxeles deforestados en 2019 en el buffer del punto 1800110 es: {n_defor_2019}"))
El número de píxeles deforestados en 2019 en el buffer del punto 1800110 es: 0

3.1.2 (b) (7.5 pts) How many forest pixels are there in the buffer? (according to IDEAM, but using the same pixel resolution as GFW)

Código
#| label: buffer-crop-mask_forest
# #| cache: false
#| message: true
#| warning: true

##########################################
#volver a cargar librerias y Rast para que funciones gneeración de archivo con eliminación de cache
pacman::p_load(char = paquetes)
package 'data.table' successfully unpacked and MD5 sums checked
package 'modelsummary' successfully unpacked and MD5 sums checked

The downloaded binary packages are in
    C:\Users\jorge\AppData\Local\Temp\RtmpqIoNdx\downloaded_packages
Código
Forest <- rast("Inputs/Raster_PartB/Consolidados/FOREST_Raster_Unido_Final_R.tif")
Loss <- rast("Inputs/Raster_PartB/Consolidados/LOSS_Raster_Unido_Final_R.tif")# volver a cargar para que funcione el puntero extero de markdown
##########################################

# Global variables
CRSg = 32618 # The coordinate system to use
CRS = "epsg:32618" # The coordinate system to use (in terra raster CRS only the number is not a valid format)

#Resample o proyect validado

r1 = Forest # (IDEAM)
r2 = Loss   # (GFW) --  r2 SIEMPRE ES LA PLANTILLA COMO

# 1) Revisar si tienen el mismo CRS -----------------------
if (crs(r1) == crs(r2)) {
  message("Ambos rásters tienen el mismo CRS")
  
  # 2) Revisar si la resolución y la grilla son iguales
  if (all(res(r1) == res(r2)) && ext(r1) == ext(r2) && all.equal(dim(r1), dim(r2))) {
    message("Ya están alineados: no necesitas ni resample ni project")
    r1_alineado <- r1
    
  } else {
    message("CRS iguales pero distinta resolución o grilla → usar RESAMPLE")
    r1_alineado <- resample(r1, r2, method = "near") # usar near si son categorías
  }
  
} else {
  message("CRS diferentes → usar PROJECT")
  r1_alineado <- project(r1, r2, method = "near")
}

|---------|---------|---------|---------|
=========================================
                                          
Código
Forest <- r1_alineado  # Forest alineado a Loss



# # Verificar la proyección
# crs(r1_alineado)
# # Verificar la resolución
# res(r1_alineado)
# # Verificar la extensión
# ext(r1_alineado)



# Crop and mask the raster
buffer_raster_crop <- crop(Forest, buffer_shp) # crops to the EXTENT
buffer_raster_mask <- mask(buffer_raster_crop, buffer_shp) # mask FOLLOWS EXACT SHAPE
plot(buffer_raster_crop) # plot raster_crop
plot(buffer_shp, add = T) # add municipality

Código
plot(buffer_raster_mask, main = "Bosques buffer punto 1800110")

Código
buff_Forest_mask<-buffer_raster_mask

# Tabla de frecuencias de los valores del raster dentro del municipio
freq_buffer<-as.data.frame(freq(buff_Forest_mask, digits = 3))


# Tabla de frecuencias (value = lossyear; count = # celdas)
freq_buffer <- rename(freq_buffer, value = value, n_cells = count) %>% 
            select(value, n_cells) %>% 
            arrange(value)

# Área por celda (m²) y por clase (ha) — EPSG:32618 usa metros
px <- terra::res(buff_Forest_mask[[1]])               # c(xres, yres) en metros
px_m2 <- px[1] * px[2]
freq_buffer$area_ha <- freq_buffer$n_cells * px_m2 / 1e4

# Porcentaje dentro del muni (opcional, útil para el informe)
total_cells <- sum(freq_buffer$n_cells)
total_ha    <- total_cells * px_m2 / 1e4
freq_buffer$Porcentaje_cells <- 100 * freq_buffer$n_cells / total_cells
freq_buffer$Porcentaje_area  <- 100 * freq_buffer$area_ha  / total_ha


# Resultado Tabla Frecuencias
knitr::kable(freq_buffer, caption = "Tabla de Frecuencias de Bosques en el buffer del punto 1800110")
Tabla de Frecuencias de Bosques en el buffer del punto 1800110
value n_cells area_ha Porcentaje_cells Porcentaje_area
1 166 12.75824 1 1
2 16434 1263.06619 99 99
Código
# Número de píxeles de bosque ideam buffer
n_bosque_ideam_buffer <- sum(buff_Forest_mask[]==1, na.rm = TRUE)


print(glue("El número de píxeles de Bosque IDEAM en buffer del punto 1800110 es: {n_bosque_ideam_buffer}"))
El número de píxeles de Bosque IDEAM en buffer del punto 1800110 es: 166

3.1.3 (c) (5 pts) How many forest pixels were deforested in the year 2018 in the buffer? And in 2019?

Código
# (b) ¿Cuántos píxeles de bosque (IDEAM) hay en el buffer
#     usando la resolución/grilla de GFW?
forest_classes <- c(1)  # <-- ajusta según tu IDEAM
n_forest_pix <- global(buff_Forest_mask %in% forest_classes, "sum", na.rm=TRUE)[1,1]

# Área (ha) con la resolución de GFW
px_area_m2 <- prod(res(buff_Loss_mask))
forest_ha  <- n_forest_pix * px_area_m2 / 1e4

# (a) Pérdida 2018/2019 y bosque∧pérdida en el buffer -----------------------
mx <- suppressWarnings(global(buff_Loss_mask, "max", na.rm=TRUE)[1,1])
code2018 <- ifelse(is.finite(mx) && mx <= 30, 18, 2018)
code2019 <- ifelse(is.finite(mx) && mx <= 30, 19, 2019)

n_loss_2018 <- global(buff_Loss_mask == code2018, "sum", na.rm=TRUE)[1,1]
n_loss_2019 <- global(buff_Loss_mask == code2019, "sum", na.rm=TRUE)[1,1]

n_def_forest_2018 <- global((buff_Forest_mask %in% forest_classes) & (buff_Loss_mask == code2018),
                            "sum", na.rm=TRUE)[1,1]
n_def_forest_2019 <- global((buff_Forest_mask %in% forest_classes) & (buff_Loss_mask == code2019),
                            "sum", na.rm=TRUE)[1,1]


list(
  forest_pixels_in_buffer = n_forest_pix,
  forest_ha_in_buffer     = forest_ha,
  loss_pixels_2018        = n_loss_2018,
  loss_pixels_2019        = n_loss_2019,
  deforest_forest_pix_2018 = n_def_forest_2018,
  deforest_forest_pix_2019 = n_def_forest_2019
)
$forest_pixels_in_buffer
[1] 166

$forest_ha_in_buffer
[1] 12.75824

$loss_pixels_2018
[1] 11

$loss_pixels_2019
[1] 0

$deforest_forest_pix_2018
[1] 0

$deforest_forest_pix_2019
[1] 0
Código
# Construir la tabla
resumen <- data.frame(
  Año = c(2018, 2019),
  `Píxeles de bosque deforestado` = c(n_def_forest_2018, n_def_forest_2019),
  Hectáreas = (c(n_def_forest_2018, n_def_forest_2019) * px_area_m2) / 1e4
)

# Mostrar con kable
knitr::kable(
  resumen,
  caption = "Deforestación en el buffer (Bosque IDEAM ∧ Pérdida GFW)",
  digits = c(0, 0, 2),
  format.args = list(big.mark = ".", decimal.mark = ",", scientific = FALSE)
)
Deforestación en el buffer (Bosque IDEAM ∧ Pérdida GFW)
Año Píxeles.de.bosque.deforestado Hectáreas
2.018 0 0
2.019 0 0

3.2 5. (25 pts) For each of the 2km buffers, compute forest area in HECTARES, deforested forest area in hectares in 2019 (and 2018 for grad students)

3.2.1 (a) (15pts) What is the average deforested forest area in hectares in 2019 (and 2018 for grad students)

3.2.2 (b) (10pts) What is the average hectares of forest in the 668 buffers?

Código
# ------------------ Global variables ------------------
CRSg <- 32618            # EPSG numérico para sf
CRS  <- "epsg:32618"     # Formato texto (si lo llegas a necesitar)

# ------------------ Importar datos anonimizados ------------------
ecoturismo <- read.csv("D:/OneDrive - Universidad del rosario/001 MACHINE LEARNING/001 exams/Problem_Set_2/Inputs/Points_Ecoturism_anonimized.csv")

# Puntos -> sf (EPSG:4326) -> reproyectar a 32618
ecoturismo_shp <- ecoturismo %>%
  st_as_sf(coords = c("lon", "lat"), crs = "EPSG:4326") %>%
  st_transform(crs = CRSg)

mapview(ecoturismo_shp)
Código
# Buffers de 2 km y convertir a SpatVector
ecoturismo_buffer <- st_buffer(ecoturismo_shp, dist = 2000)
ecoturismo_buffer <- terra::vect(ecoturismo_buffer)
mapview(ecoturismo_buffer); plot(ecoturismo_buffer)

Código
# ------------------ Alinear Forest -> Loss (Loss es plantilla) ------------------
# Asumo que ya cargaste:
# Forest <- rast("Inputs/Raster_PartB/Consolidados/FOREST_Raster_Unido_Final_R.tif")
# Loss   <- rast("Inputs/Raster_PartB/Consolidados/LOSS_Raster_Unido_Final_R.tif")

if (!compareGeom(Forest, Loss, stopOnError = FALSE)) {
  if (crs(Forest) == crs(Loss)) {
    Forest <- resample(Forest, Loss, method = "near")
  } else {
    Forest <- project(Forest, Loss, method = "near")
  }
}

# ------------------ Recorte + máscara (TODOS los buffers juntos) ------------------
Eco_buff_Forest_mask <- mask(crop(Forest, ecoturismo_buffer), ecoturismo_buffer)

|---------|---------|---------|---------|
=========================================
                                          

|---------|---------|---------|---------|
=========================================
                                          

|---------|---------|---------|---------|
=========================================
                                          
Código
Eco_buff_Loss_mask   <- mask(crop(Loss,   ecoturismo_buffer), ecoturismo_buffer)

|---------|---------|---------|---------|
=========================================
                                          

|---------|---------|---------|---------|
=========================================
                                          

|---------|---------|---------|---------|
=========================================
                                          
Código
# (b) ¿Cuántos píxeles de bosque (IDEAM) hay en el conjunto de buffers?
forest_classes <- c(1)  # <-- AJUSTA según tu IDEAM
n_forest_pix <- global(Eco_buff_Forest_mask %in% forest_classes, "sum", na.rm = TRUE)[1,1]

|---------|---------|---------|---------|
=========================================
                                          
Código
# Área (ha) con la resolución de GFW (Loss)
px_area_m2 <- prod(res(Eco_buff_Loss_mask))
forest_ha  <- n_forest_pix * px_area_m2 / 1e4

# (a) Pérdida 2018/2019 y bosque∧pérdida en el conjunto de buffers
mx <- suppressWarnings(global(Eco_buff_Loss_mask, "max", na.rm = TRUE)[1,1])
code2018 <- ifelse(is.finite(mx) && mx <= 30, 18, 2018)
code2019 <- ifelse(is.finite(mx) && mx <= 30, 19, 2019)

n_loss_2018 <- global(Eco_buff_Loss_mask == code2018, "sum", na.rm = TRUE)[1,1]

|---------|---------|---------|---------|
=========================================
                                          
Código
n_loss_2019 <- global(Eco_buff_Loss_mask == code2019, "sum", na.rm = TRUE)[1,1]

|---------|---------|---------|---------|
=========================================
                                          
Código
n_def_forest_2018 <- global(
  (Eco_buff_Forest_mask %in% forest_classes) & (Eco_buff_Loss_mask == code2018),
  "sum", na.rm = TRUE
)[1,1]

|---------|---------|---------|---------|
=========================================
                                          

|---------|---------|---------|---------|
=========================================
                                          

|---------|---------|---------|---------|
=========================================
                                          
Código
n_def_forest_2019 <- global(
  (Eco_buff_Forest_mask %in% forest_classes) & (Eco_buff_Loss_mask == code2019),
  "sum", na.rm = TRUE
)[1,1]

|---------|---------|---------|---------|
=========================================
                                          

|---------|---------|---------|---------|
=========================================
                                          

|---------|---------|---------|---------|
=========================================
                                          
Código
list(
  forest_pixels_in_buffer   = n_forest_pix,
  forest_ha_in_buffer       = forest_ha,
  loss_pixels_2018          = n_loss_2018,
  loss_pixels_2019          = n_loss_2019,
  deforest_forest_pix_2018  = n_def_forest_2018,
  deforest_forest_pix_2019  = n_def_forest_2019
)
$forest_pixels_in_buffer
[1] 618074

$forest_ha_in_buffer
[1] 47503.25

$loss_pixels_2018
[1] 4576

$loss_pixels_2019
[1] 6214

$deforest_forest_pix_2018
[1] 1095

$deforest_forest_pix_2019
[1] 703
Código
# Tabla kable (conjunto de buffers)
resumen <- data.frame(
  Año = c(2018, 2019),
  `Píxeles de bosque deforestado` = c(n_def_forest_2018, n_def_forest_2019),
  Hectáreas = (c(n_def_forest_2018, n_def_forest_2019) * px_area_m2) / 1e4
)

knitr::kable(
  resumen,
  caption = "Deforestación en el conjunto de buffers (Bosque IDEAM ∧ Pérdida GFW)",
  digits = c(0, 0, 2),
  format.args = list(big.mark = ".", decimal.mark = ",", scientific = FALSE)
)
Deforestación en el conjunto de buffers (Bosque IDEAM ∧ Pérdida GFW)
Año Píxeles.de.bosque.deforestado Hectáreas
2.018 1.095 84,16
2.019 703 54,03
Código
# ------------------ Totales POR CADA buffer (668 filas) ------------------
# --- Resumen para UN SOLO buffer (i = índice del buffer)
resumen_buffer_i <- function(i) {
  buf_i <- ecoturismo_buffer[i]

  # Recorte + máscara
  f_mask <- mask(crop(Forest, buf_i), buf_i)
  d_mask <- mask(crop(Loss,   buf_i), buf_i)

  # Conteos estilo clase (usando []):
  n_forest         <- sum(f_mask[] %in% forest_classes, na.rm = TRUE)
  n_loss2018       <- sum(d_mask[] == code2018, na.rm = TRUE)
  n_loss2019       <- sum(d_mask[] == code2019, na.rm = TRUE)
  n_forest_loss18  <- sum((f_mask[] %in% forest_classes) & (d_mask[] == code2018), na.rm = TRUE)
  n_forest_loss19  <- sum((f_mask[] %in% forest_classes) & (d_mask[] == code2019), na.rm = TRUE)

  tibble(
    id_buffer   = i,
    pix_forest  = n_forest,
    pix_loss2018 = n_loss2018,
    pix_loss2019 = n_loss2019,
    pix_bosque_def2018 = n_forest_loss18,
    pix_bosque_def2019 = n_forest_loss19,
    ha_forest   = n_forest        * px_area_m2 / 1e4,
    ha_def2018  = n_forest_loss18 * px_area_m2 / 1e4,
    ha_def2019  = n_forest_loss19 * px_area_m2 / 1e4
  )
}

# =================== 3) APLICAR A TODOS LOS BUFFERS ===================
res_por_buffer <- purrr::map_dfr(seq_len(nrow(ecoturismo_buffer)), resumen_buffer_i)

# =================== 4) TABLAS Y PROMEDIOS ===================
# Tabla por buffer (muestra primeras filas)
knitr::kable(
  head(res_por_buffer, 10),
  caption = "Primeras 10 filas: resumen por buffer",
  digits = 2,
  format.args = list(big.mark = ".", decimal.mark = ",", scientific = FALSE)
)
Primeras 10 filas: resumen por buffer
id_buffer pix_forest pix_loss2018 pix_loss2019 pix_bosque_def2018 pix_bosque_def2019 ha_forest ha_def2018 ha_def2019
1 106 11 0 0 0 8,15 0,00 0,00
2 166 0 0 0 0 12,76 0,00 0,00
3 1.677 6 0 0 0 128,89 0,00 0,00
4 7.643 40 17 25 12 587,42 1,92 0,92
5 7.207 21 16 12 11 553,91 0,92 0,85
6 106 2 0 0 0 8,15 0,00 0,00
7 166 11 0 0 0 12,76 0,00 0,00
8 1.176 27 15 0 0 90,38 0,00 0,00
9 5.994 14 1 7 0 460,68 0,54 0,00
10 7.473 13 1 8 0 574,35 0,61 0,00
Código
# Promedios solicitados
tabla_prom <- tibble(
  Métrica = c("Promedio ha de bosque (todos los buffers)",
              "Promedio ha de bosque deforestado en 2019",
              "Promedio ha de bosque deforestado en 2018"),
  Hectáreas = c(mean(res_por_buffer$ha_forest,  na.rm = TRUE),
                mean(res_por_buffer$ha_def2019, na.rm = TRUE),
                mean(res_por_buffer$ha_def2018, na.rm = TRUE))
)

knitr::kable(
  tabla_prom,
  caption = "Promedios por buffer (Bosque IDEAM ∧ Pérdida GFW)",
  digits = 2,
  format.args = list(big.mark = ".", decimal.mark = ",", scientific = FALSE)
)
Promedios por buffer (Bosque IDEAM ∧ Pérdida GFW)
Métrica Hectáreas
Promedio ha de bosque (todos los buffers) 189,62
Promedio ha de bosque deforestado en 2019 0,20
Promedio ha de bosque deforestado en 2018 0,26

3.3 6. (25 pts) Write the specification and run the corresponding regressions to compute the following:

Código
# Paquetes para estimación y tabla
pacman::p_load(estimatr, modelsummary, broom, kableExtra)
package 'data.table' successfully unpacked and MD5 sums checked
package 'modelsummary' successfully unpacked and MD5 sums checked

The downloaded binary packages are in
    C:\Users\jorge\AppData\Local\Temp\RtmpqIoNdx\downloaded_packages
Código
# ------------------ Dataset a nivel de buffer ------------------
# Asegurar que existe res_por_buffer; si no, detener:
stopifnot(exists("res_por_buffer"),
          all(c("id_buffer","ha_def2018","ha_def2019") %in% names(res_por_buffer)))

# id de buffer consistente con res_por_buffer (mismo orden que los puntos)
ecoturismo_shp <- ecoturismo_shp %>%
  dplyr::mutate(id_buffer = dplyr::row_number())

att_buf <- ecoturismo_shp %>% sf::st_drop_geometry()
df <- res_por_buffer %>%
  dplyr::left_join(att_buf, by = "id_buffer")

# Unir métricas por buffer con atributos de encuesta
df <- res_por_buffer %>%
  left_join(att_buf, by = "id_buffer")

# ------------------ Variable de tratamiento ------------------
# Autodetección; si tu columna tiene otro nombre, fuerza: trat_var <- "mi_col"
# posibles_nombres <- c("treat","treatment","trat","tratamiento","eco_treat","ecoturism","ecotourism","ECO","grupo","group")

trat_var <- "treat"
trat_var <- if (length(trat_var)==0) "treat" else trat_var[1]

if (!trat_var %in% names(df)) {
  stop(paste0(
    "No encuentro la columna de tratamiento. ",
    "Crea/renombra tu variable (0/1) y pon su nombre en 'trat_var'.\n",
    "Busqué: ", paste(posibles_nombres, collapse=", ")
  ))
}

# Normalizar a 0/1
df <- df %>%
  mutate(
    T_ecotur = case_when(
      .data[[trat_var]] %in% c(1, "1", TRUE, "TRUE", "T", "treated",
                               "Treatment", "Tratado", "SI", "Sí", "si") ~ 1L,
      .data[[trat_var]] %in% c(0, "0", FALSE, "FALSE", "F", "control", "Control", "No") ~ 0L,
      is.numeric(.data[[trat_var]]) ~ as.integer(.data[[trat_var]] != 0),
      TRUE ~ NA_integer_
    )
  )

# Un pequeño chequeo de tratados/control
df %>% count(T_ecotur, name = "n")
Código
# Reemplazar NAs en outcomes con 0 (si no hubo pérdida)
df <- df %>%
  mutate(
    ha_def2018 = replace_na(ha_def2018, 0),
    ha_def2019 = replace_na(ha_def2019, 0)
  )

# ------------------ Helper: SE robustos (cluster si existe 'pair') ------------------
fit_robust <- function(formula, data){
  if ("pair" %in% names(data)) {
    lm_robust(formula, data = data, clusters = data$pair)  # CR2 por cluster en 'pair'
  } else {
    lm_robust(formula, data = data, se_type = "HC2")       # robustos tipo HC2
  }
}

3.3.1 (a) (7 pts) What is the eco-tourism treatment effect on deforestation using a simple treatment-control RCT specification in 2019?

Código
# ------------------ RCT 2019 ------------------
# Y_i = α + β * T_i + ε_i

# (a) RCT simple (2019)
m_RCT_2019 <- fit_robust(ha_def2019 ~ T_ecotur, data = df)
summary(m_RCT_2019)

Call:
lm_robust(formula = formula, data = data, clusters = data$pair)

Standard error type:  CR2 

Coefficients:
            Estimate Std. Error t value Pr(>|t|) CI Lower CI Upper    DF
(Intercept)   0.2596    0.08417   3.084 0.004097   0.0884  0.43086 33.11
T_ecotur     -0.1174    0.09306  -1.262 0.215581  -0.3064  0.07163 34.47

Multiple R-squared:  0.00958 ,  Adjusted R-squared:  0.008093 
F-statistic: 1.592 on 1 and 36 DF,  p-value: 0.2152
Código
# Extraer coeficiente y error estándar
coef_a <- tidy(m_RCT_2019) %>% filter(term == "T_ecotur") %>% select(estimate, std.error)
coef_a

3.3.2 (b) (Only graduate students) (7 pts) What is the eco-tourism treatment effect on deforestation using a Difference-in-Differences specification?

Código
# ------------------ DiD 2018–2019 ------------------
# Y_it = α + β1*T_i + β2*Post_t + β3*(T_i*Post_t) + ε_it
long <- df %>%
  select(id_buffer, T_ecotur, ha_def2018, ha_def2019) %>%
  pivot_longer(starts_with("ha_def20"),
               names_to = "anio", values_to = "ha_def") %>%
  mutate(year = if_else(anio == "ha_def2018", 2018L, 2019L),
         Post = as.integer(year == 2019L))

m_DiD <- fit_robust(ha_def ~ T_ecotur*Post, data = long)
# El efecto DiD es el coeficiente de la interacción: T_ecotur:Post
summary(m_DiD)

Call:
lm_robust(formula = formula, data = data, se_type = "HC2")

Standard error type:  HC2 

Coefficients:
              Estimate Std. Error t value  Pr(>|t|) CI Lower CI Upper   DF
(Intercept)    0.30431    0.04807  6.3298 3.347e-10   0.2100  0.39862 1332
T_ecotur      -0.09207    0.06164 -1.4936 1.355e-01  -0.2130  0.02886 1332
Post          -0.04467    0.06033 -0.7405 4.591e-01  -0.1630  0.07367 1332
T_ecotur:Post -0.02534    0.07728 -0.3279 7.431e-01  -0.1769  0.12627 1332

Multiple R-squared:  0.007339 , Adjusted R-squared:  0.005103 
F-statistic: 3.715 on 3 and 1332 DF,  p-value: 0.01119
Código
# Extraer coeficiente y error estándar
coef_b <- tidy(m_DiD) %>% filter(term == "T_ecotur:Post") %>% select(estimate, std.error)
coef_b

3.3.3 (c) (Only PhD and econ Master students) (7 pts) What is the eco-tourism treatment effect on deforestation using an ANCOVA specification in 2019?

Código
# ------------------ ANCOVA 2019 ------------------

# Y_2019 = α + β * T + θ * Y_2018 + ε
m_ANCOVA_2019 <- fit_robust(ha_def2019 ~ T_ecotur + ha_def2018, data = df)
summary(m_ANCOVA_2019)

Call:
lm_robust(formula = formula, data = data, clusters = data$pair)

Standard error type:  CR2 

Coefficients:
            Estimate Std. Error t value Pr(>|t|) CI Lower CI Upper     DF
(Intercept)  0.13554    0.05968   2.271 0.030055  0.01394  0.25714 31.755
T_ecotur    -0.07986    0.07212  -1.107 0.275840 -0.22636  0.06664 34.427
ha_def2018   0.40778    0.11150   3.657 0.006652  0.14975  0.66581  7.842

Multiple R-squared:  0.2977 ,   Adjusted R-squared:  0.2956 
F-statistic: 7.012 on 2 and 36 DF,  p-value: 0.002682
Código
# Extraer coeficiente y error estándar
coef_c <- tidy(m_ANCOVA_2019) %>% filter(term == "T_ecotur") %>% select(estimate, std.error)
coef_c

3.3.4 (d) (Only PhD and econ Master students) (4 pts) Produce a nice paper-looking table with your three regression estimates.

Código
# ------------------ Tabla resumen ------------------
#| label: ps2-6d-kable
#| warning: false
#| message: false

pacman::p_load(dplyr, broom, knitr, kableExtra)

# --- Chequeos: necesitamos los objetos creados en 6(a)-(c) y los data.frames ---
stopifnot(
  exists("m_RCT_2019"), exists("m_DiD"), exists("m_ANCOVA_2019"),
  exists("df"), exists("long")
)

# Helpers de formato
sig <- function(p) ifelse(is.na(p),"", ifelse(p<0.01,"***", ifelse(p<0.05,"**", ifelse(p<0.10,"*",""))))
fmt <- function(x, d=3) formatC(x, format="f", digits=d, big.mark=".", decimal.mark=",")
grab <- function(tt, term){
  row <- broom::tidy(tt) |> dplyr::filter(term==term)
  if (nrow(row)==0) return(c(NA_real_, NA_real_, NA_real_))
  c(row$estimate[1], row$std.error[1], row$p.value[1])
}
cell_coef <- function(x) paste0(fmt(x[1]), sig(x[3]))         # devuelve character
cell_se   <- function(x) paste0("(", fmt(x[2]), ")")          # devuelve character

# Extraer (b, se, p) para cada modelo
b_RCT  <- grab(m_RCT_2019, "T_ecotur");       b0_RCT <- grab(m_RCT_2019, "(Intercept)")
b_DiD  <- grab(m_DiD,       "T_ecotur:Post"); b0_DiD <- grab(m_DiD, "(Intercept)")
b_ANC  <- grab(m_ANCOVA_2019,"T_ecotur");     b0_ANC <- grab(m_ANCOVA_2019, "(Intercept)")

# R2 (para reporte) y N; convertir a character
R2_RCT <- fmt(summary(lm(ha_def2019 ~ T_ecotur, data=df))$r.squared, 3)
R2_DiD <- fmt(summary(lm(ha_def ~ T_ecotur*Post, data=long))$r.squared, 3)
R2_ANC <- fmt(summary(lm(ha_def2019 ~ T_ecotur + ha_def2018, data=df))$r.squared, 3)
N_RCT  <- as.character(nobs(m_RCT_2019))
N_DiD  <- as.character(nobs(m_DiD))
N_ANC  <- as.character(nobs(m_ANCOVA_2019))

# Construir tabla (todas las columnas como character)
tabla_long <- tibble::tibble(
  Variable            = c("Tratamiento ecoturismo", "  EE (robustos)",
                          "Constante",              "  EE (robustos)",
                          "R²", "N"),
  `RCT 2019`          = c(cell_coef(b_RCT),  cell_se(b_RCT),
                          cell_coef(b0_RCT), cell_se(b0_RCT),
                          R2_RCT, N_RCT),
  `DiD 2018–2019`     = c(cell_coef(b_DiD),  cell_se(b_DiD),
                          cell_coef(b0_DiD), cell_se(b0_DiD),
                          R2_DiD, N_DiD),
  `ANCOVA 2019`       = c(cell_coef(b_ANC),  cell_se(b_ANC),
                          cell_coef(b0_ANC), cell_se(b0_ANC),
                          R2_ANC, N_ANC)
)

nota_cluster <- if ("pair" %in% names(df)) {
  "EE robustos. Si existe 'pair', se clusteriza por par."
} else {
  "EE robustos (HC2)."
}

knitr::kable(
  tabla_long,
  caption = paste0(
    "Efecto del ecoturismo sobre deforestación (hectáreas).\n",
    "Coeficiente y error estándar en filas separadas. Signif.: * p<0,10; ** p<0,05; *** p<0,01. ",
    nota_cluster
  ),
  align = "lccc", escape = TRUE, booktabs = TRUE
) %>% 
  kableExtra::kable_styling(full_width = FALSE, position = "center") %>% 
  kableExtra::row_spec(0, bold = TRUE) %>% 
  kableExtra::column_spec(1, bold = TRUE)
Efecto del ecoturismo sobre deforestación (hectáreas). Coeficiente y error estándar en filas separadas. Signif.: * p<0,10; ** p<0,05; *** p<0,01. EE robustos. Si existe 'pair', se clusteriza por par.
Variable RCT 2019 DiD 2018–2019 ANCOVA 2019
Tratamiento ecoturismo 0,260*** 0,304*** 0,136**
EE (robustos) (0,084) (0,048) (0,060)
Constante 0,260*** 0,304*** 0,136**
EE (robustos) (0,084) (0,048) (0,060)
0,010 0,007 0,298
N 668 1336 668